summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-08-04 03:31:52 +0200
committerFernando Sahmkow <fsahmkow27@gmail.com>2023-09-23 23:05:29 +0200
commitbdc01254a9b3ce8359f8f007c2102cb2d112418e (patch)
treeb75b974c0751f83089d64957df567e0d138981b1
parentMerge pull request #11567 from liamwhite/fixing-my-error (diff)
downloadyuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar
yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.gz
yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.bz2
yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.lz
yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.xz
yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.zst
yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.zip
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/query_cache/bank_base.h106
-rw-r--r--src/video_core/query_cache/query_base.h72
-rw-r--r--src/video_core/query_cache/query_cache.h543
-rw-r--r--src/video_core/query_cache/query_cache_base.h181
-rw-r--r--src/video_core/query_cache/query_stream.h125
-rw-r--r--src/video_core/query_cache/types.h74
7 files changed, 1107 insertions, 0 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 9b13ccbab..cf9266d54 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -95,6 +95,12 @@ add_library(video_core STATIC
memory_manager.h
precompiled_headers.h
pte_kind.h
+ query_cache/bank_base.h
+ query_cache/query_base.h
+ query_cache/query_cache_base.h
+ query_cache/query_cache.h
+ query_cache/query_stream.h
+ query_cache/types.h
query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h
new file mode 100644
index 000000000..4246a609d
--- /dev/null
+++ b/src/video_core/query_cache/bank_base.h
@@ -0,0 +1,106 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <atomic>
+#include <deque>
+#include <utility>
+
+
+#include "common/common_types.h"
+
+namespace VideoCommon {
+
+class BankBase {
+protected:
+ const size_t base_bank_size;
+ size_t bank_size;
+ std::atomic<size_t> references;
+ size_t current_slot;
+
+public:
+ BankBase(size_t bank_size_)
+ : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {}
+
+ virtual ~BankBase() = default;
+
+ virtual std::pair<bool, size_t> Reserve() {
+ if (IsClosed()) {
+ return {false, bank_size};
+ }
+ const size_t result = current_slot++;
+ return {true, result};
+ }
+
+ virtual void Reset() {
+ current_slot = 0;
+ references = 0;
+ bank_size = base_bank_size;
+ }
+
+ size_t Size() const {
+ return bank_size;
+ }
+
+ void AddReference(size_t how_many = 1) {
+ references.fetch_add(how_many, std::memory_order_relaxed);
+ }
+
+ void CloseReference(size_t how_many = 1) {
+ if (how_many > references.load(std::memory_order_relaxed)) {
+ UNREACHABLE();
+ }
+ references.fetch_sub(how_many, std::memory_order_relaxed);
+ }
+
+ void Close() {
+ bank_size = current_slot;
+ }
+
+ constexpr bool IsClosed() {
+ return current_slot >= bank_size;
+ }
+
+ bool IsDead() {
+ return IsClosed() && references == 0;
+ }
+};
+
+template <typename BankType>
+class BankPool {
+private:
+ std::deque<BankType> bank_pool;
+ std::deque<size_t> bank_indices;
+
+public:
+ BankPool() = default;
+ ~BankPool() = default;
+
+ // Reserve a bank from the pool and return its index
+ template <typename Func>
+ size_t ReserveBank(Func&& builder) {
+ if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) {
+ size_t new_index = bank_indices.front();
+ bank_indices.pop_front();
+ bank_pool[new_index].Reset();
+ return new_index;
+ }
+ size_t new_index = bank_pool.size();
+ builder(bank_pool, new_index);
+ bank_indices.push_back(new_index);
+ return new_index;
+ }
+
+ // Get a reference to a bank using its index
+ BankType& GetBank(size_t index) {
+ return bank_pool[index];
+ }
+
+ // Get the total number of banks in the pool
+ size_t BankCount() const {
+ return bank_pool.size();
+ }
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
new file mode 100644
index 000000000..485ed669c
--- /dev/null
+++ b/src/video_core/query_cache/query_base.h
@@ -0,0 +1,72 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace VideoCommon {
+
+enum class QueryFlagBits : u32 {
+ HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp.
+ IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
+ IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
+ IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
+ IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
+ IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
+ IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
+ IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
+ IsFence = 1 << 8, ///< Indicates the query is a fence.
+};
+DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
+
+class QueryBase {
+public:
+ VAddr guest_address;
+ QueryFlagBits flags;
+ u64 value;
+
+protected:
+ // Default constructor
+ QueryBase() : guest_address(0), flags{}, value{} {}
+
+ // Parameterized constructor
+ QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
+ : guest_address(address), flags(flags_), value{value_} {}
+};
+
+class GuestQuery : public QueryBase {
+public:
+ // Parameterized constructor
+ GuestQuery(bool isLong, VAddr address, u64 queryValue)
+ : QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) {
+ if (isLong) {
+ flags |= QueryFlagBits::HasTimestamp;
+ }
+ }
+};
+
+class HostQueryBase : public QueryBase {
+public:
+ // Default constructor
+ HostQueryBase()
+ : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{},
+ size_banks{}, start_slot{}, size_slots{} {}
+
+ // Parameterized constructor
+ HostQueryBase(bool isLong, VAddr address)
+ : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
+ start_slot{}, size_slots{} {
+ if (isLong) {
+ flags |= QueryFlagBits::HasTimestamp;
+ }
+ }
+
+ u32 start_bank_id;
+ u32 size_banks;
+ size_t start_slot;
+ size_t size_slots;
+};
+
+} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
new file mode 100644
index 000000000..f6af48d14
--- /dev/null
+++ b/src/video_core/query_cache/query_cache.h
@@ -0,0 +1,543 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <array>
+#include <deque>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "common/scope_exit.h"
+#include "common/settings.h"
+#include "core/memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/query_cache/bank_base.h"
+#include "video_core/query_cache/query_base.h"
+#include "video_core/query_cache/query_cache_base.h"
+#include "video_core/query_cache/query_stream.h"
+#include "video_core/query_cache/types.h"
+
+namespace VideoCommon {
+
+using Maxwell = Tegra::Engines::Maxwell3D;
+
+struct SyncValuesStruct {
+ VAddr address;
+ u64 value;
+ u64 size;
+
+ static constexpr bool GeneratesBaseBuffer = true;
+};
+
+template <typename Traits>
+class GuestStreamer : public SimpleStreamer<GuestQuery> {
+public:
+ using RuntimeType = typename Traits::RuntimeType;
+
+ GuestStreamer(size_t id_, RuntimeType& runtime_)
+ : SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {}
+
+ virtual ~GuestStreamer() = default;
+
+ size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
+ std::optional<u32> subreport = std::nullopt) override {
+ auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value));
+ pending_sync.push_back(new_id);
+ return new_id;
+ }
+
+ bool HasPendingSync() override {
+ return !pending_sync.empty();
+ }
+
+ void SyncWrites() override {
+ if (pending_sync.empty()) {
+ return;
+ }
+ std::vector<SyncValuesStruct> sync_values;
+ sync_values.reserve(pending_sync.size());
+ for (size_t pending_id : pending_sync) {
+ auto& query = slot_queries[pending_id];
+ if (True(query.flags & QueryFlagBits::IsRewritten) ||
+ True(query.flags & QueryFlagBits::IsInvalidated)) {
+ continue;
+ }
+ query.flags |= QueryFlagBits::IsHostSynced;
+ sync_values.emplace_back(query.guest_address, query.value,
+ True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4);
+ }
+ pending_sync.clear();
+ if (sync_values.size() > 0) {
+ runtime.template SyncValues<SyncValuesStruct>(sync_values);
+ }
+ }
+
+private:
+ RuntimeType& runtime;
+ std::deque<size_t> pending_sync;
+};
+
+template <typename Traits>
+class StubStreamer : public GuestStreamer<Traits> {
+public:
+ using RuntimeType = typename Traits::RuntimeType;
+
+ StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {}
+
+ ~StubStreamer() override = default;
+
+ size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
+ std::optional<u32> subreport = std::nullopt) override {
+ size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport);
+ return new_id;
+ }
+};
+
+template <typename Traits>
+struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
+ using RuntimeType = typename Traits::RuntimeType;
+
+ QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
+ Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_)
+ : owner{owner_}, rasterizer{rasterizer_},
+ cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} {
+ streamer_mask = 0;
+ for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
+ streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
+ if (streamers[i]) {
+ streamer_mask |= 1ULL << i;
+ }
+ }
+ }
+
+ template <typename Func>
+ void ForEachStreamerIn(u64 mask, Func&& func) {
+ static constexpr bool RETURNS_BOOL =
+ std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>;
+ while (mask != 0) {
+ size_t position = std::countr_zero(mask);
+ mask &= ~(1ULL << position);
+ if constexpr (RETURNS_BOOL) {
+ if (func(streamers[position])) {
+ return;
+ }
+ } else {
+ func(streamers[position]);
+ }
+ }
+ }
+
+ template <typename Func>
+ void ForEachStreamer(Func&& func) {
+ ForEachStreamerIn(streamer_mask, func);
+ }
+
+ QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) {
+ size_t which_stream = location.stream_id.Value();
+ auto* streamer = streamers[which_stream];
+ if (!streamer) {
+ return nullptr;
+ }
+ return streamer->GetQuery(location.query_id.Value());
+ }
+
+ QueryCacheBase<Traits>* owner;
+ VideoCore::RasterizerInterface& rasterizer;
+ Core::Memory::Memory& cpu_memory;
+ Traits::RuntimeType& runtime;
+ Tegra::GPU& gpu;
+ std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
+ u64 streamer_mask;
+ std::mutex flush_guard;
+ std::deque<u64> flushes_pending;
+ std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister;
+};
+
+template <typename Traits>
+QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
+ VideoCore::RasterizerInterface& rasterizer_,
+ Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_)
+ : cached_queries{} {
+ impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
+ this, rasterizer_, cpu_memory_, runtime_, gpu_);
+}
+
+template <typename Traits>
+QueryCacheBase<Traits>::~QueryCacheBase() = default;
+
+template <typename Traits>
+void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) {
+ size_t index = static_cast<size_t>(counter_type);
+ StreamerInterface* streamer = impl->streamers[index];
+ if (!streamer) [[unlikely]] {
+ UNREACHABLE();
+ return;
+ }
+ if (is_enabled) {
+ streamer->StartCounter();
+ } else {
+ streamer->PauseCounter();
+ }
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) {
+ size_t index = static_cast<size_t>(counter_type);
+ StreamerInterface* streamer = impl->streamers[index];
+ if (!streamer) [[unlikely]] {
+ UNREACHABLE();
+ return;
+ }
+ streamer->CloseCounter();
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
+ size_t index = static_cast<size_t>(counter_type);
+ StreamerInterface* streamer = impl->streamers[index];
+ if (!streamer) [[unlikely]] {
+ UNIMPLEMENTED();
+ return;
+ }
+ streamer->ResetCounter();
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::BindToChannel(s32 id) {
+ VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id);
+ impl->runtime.Bind3DEngine(maxwell3d);
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type,
+ QueryPropertiesFlags flags, u32 payload, u32 subreport) {
+ const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout);
+ const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
+ size_t streamer_id = static_cast<size_t>(counter_type);
+ auto* streamer = impl->streamers[streamer_id];
+ if (!streamer) [[unlikely]] {
+ if (has_timestamp) {
+ u64 timestamp = impl->gpu.GetTicks();
+ gpu_memory->Write<u64>(addr + 8, timestamp);
+ gpu_memory->Write<u64>(addr, 1ULL);
+ } else {
+ gpu_memory->Write<u32>(addr, 1U);
+ }
+ return;
+ }
+ auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
+ if (!cpu_addr_opt) [[unlikely]] {
+ return;
+ }
+ VAddr cpu_addr = *cpu_addr_opt;
+ const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport);
+ auto* query = streamer->GetQuery(new_query_id);
+ if (is_fence) {
+ query->flags |= QueryFlagBits::IsFence;
+ }
+ QueryLocation query_location{};
+ query_location.stream_id.Assign(static_cast<u32>(streamer_id));
+ query_location.query_id.Assign(static_cast<u32>(new_query_id));
+ const auto gen_caching_indexing = [](VAddr cur_addr) {
+ return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
+ static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
+ };
+ u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
+ u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
+ bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
+ std::function<void()> operation(
+ [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] {
+ if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
+ if (!is_synced) [[likely]] {
+ impl->pending_unregister.push_back(query_location);
+ }
+ return;
+ }
+ if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
+ UNREACHABLE();
+ return;
+ }
+ if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
+ u64 timestamp = impl->gpu.GetTicks();
+ std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp));
+ std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
+ } else {
+ u32 value = static_cast<u32>(query_base->value);
+ std::memcpy(pointer, &value, sizeof(value));
+ }
+ if (!is_synced) [[likely]] {
+ impl->pending_unregister.push_back(query_location);
+ }
+ });
+ if (is_fence) {
+ impl->rasterizer.SignalFence(std::move(operation));
+ } else {
+ impl->rasterizer.SyncOperation(std::move(operation));
+ }
+ if (is_synced) {
+ streamer->Free(new_query_id);
+ return;
+ }
+ auto [cont_addr, base] = gen_caching_indexing(cpu_addr);
+ {
+ std::scoped_lock lock(cache_mutex);
+ auto it1 = cached_queries.try_emplace(cont_addr);
+ auto& sub_container = it1.first->second;
+ auto it_current = sub_container.find(base);
+ if (it_current == sub_container.end()) {
+ sub_container.insert_or_assign(base, query_location);
+ return;
+ }
+ auto* old_query = impl->ObtainQuery(it_current->second);
+ old_query->flags |= QueryFlagBits::IsRewritten;
+ sub_container.insert_or_assign(base, query_location);
+ }
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::UnregisterPending() {
+ const auto gen_caching_indexing = [](VAddr cur_addr) {
+ return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
+ static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
+ };
+ std::scoped_lock lock(cache_mutex);
+ for (QueryLocation loc : impl->pending_unregister) {
+ const auto [streamer_id, query_id] = loc.unpack();
+ auto* streamer = impl->streamers[streamer_id];
+ if (!streamer) [[unlikely]] {
+ continue;
+ }
+ auto* query = streamer->GetQuery(query_id);
+ auto [cont_addr, base] = gen_caching_indexing(query->guest_address);
+ auto it1 = cached_queries.find(cont_addr);
+ if (it1 != cached_queries.end()) {
+ auto it2 = it1->second.find(base);
+ if (it2 != it1->second.end()) {
+ if (it2->second.raw == loc.raw) {
+ it1->second.erase(it2);
+ }
+ }
+ }
+ streamer->Free(query_id);
+ }
+ impl->pending_unregister.clear();
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::NotifyWFI() {
+ bool should_sync = false;
+ impl->ForEachStreamer(
+ [&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); });
+ if (!should_sync) {
+ return;
+ }
+
+ impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); });
+ impl->runtime.Barriers(true);
+ impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); });
+ impl->runtime.Barriers(false);
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::NotifySegment(bool resume) {
+ if (resume) {
+ impl->runtime.ResumeHostConditionalRendering();
+ } else {
+ impl->runtime.PauseHostConditionalRendering();
+ CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
+ CounterClose(VideoCommon::QueryType::StreamingByteCount);
+ }
+}
+
+template <typename Traits>
+bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
+ bool qc_dirty = false;
+ const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData {
+ auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address);
+ if (!cpu_addr_opt) [[unlikely]] {
+ return VideoCommon::LookupData{
+ .address = 0,
+ .found_query = nullptr,
+ };
+ }
+ VAddr cpu_addr = *cpu_addr_opt;
+ std::scoped_lock lock(cache_mutex);
+ auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS);
+ if (it1 == cached_queries.end()) {
+ return VideoCommon::LookupData{
+ .address = cpu_addr,
+ .found_query = nullptr,
+ };
+ }
+ auto& sub_container = it1->second;
+ auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK);
+
+ if (it_current == sub_container.end()) {
+ auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4);
+ if (it_current_2 == sub_container.end()) {
+ return VideoCommon::LookupData{
+ .address = cpu_addr,
+ .found_query = nullptr,
+ };
+ }
+ }
+ auto* query = impl->ObtainQuery(it_current->second);
+ qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) &&
+ False(query->flags & QueryFlagBits::IsGuestSynced);
+ return VideoCommon::LookupData{
+ .address = cpu_addr,
+ .found_query = query,
+ };
+ };
+
+ auto& regs = maxwell3d->regs;
+ if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) {
+ impl->runtime.EndHostConditionalRendering();
+ return false;
+ }
+ /*if (!Settings::IsGPULevelHigh()) {
+ impl->runtime.EndHostConditionalRendering();
+ return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24,
+ VideoCommon::CacheType::BufferCache |
+ VideoCommon::CacheType::QueryCache);
+ }*/
+ const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
+ const GPUVAddr address = regs.render_enable.Address();
+ switch (mode) {
+ case ComparisonMode::True:
+ impl->runtime.EndHostConditionalRendering();
+ return false;
+ case ComparisonMode::False:
+ impl->runtime.EndHostConditionalRendering();
+ return false;
+ case ComparisonMode::Conditional: {
+ VideoCommon::LookupData object_1{gen_lookup(address)};
+ return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty);
+ }
+ case ComparisonMode::IfEqual: {
+ VideoCommon::LookupData object_1{gen_lookup(address)};
+ VideoCommon::LookupData object_2{gen_lookup(address + 16)};
+ return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty,
+ true);
+ }
+ case ComparisonMode::IfNotEqual: {
+ VideoCommon::LookupData object_1{gen_lookup(address)};
+ VideoCommon::LookupData object_2{gen_lookup(address + 16)};
+ return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty,
+ false);
+ }
+ default:
+ return false;
+ }
+}
+
+// Async downloads
+template <typename Traits>
+void QueryCacheBase<Traits>::CommitAsyncFlushes() {
+ u64 mask{};
+ {
+ std::scoped_lock lk(impl->flush_guard);
+ impl->ForEachStreamer([&mask](StreamerInterface* streamer) {
+ bool local_result = streamer->HasUnsyncedQueries();
+ if (local_result) {
+ mask |= 1ULL << streamer->GetId();
+ }
+ });
+ impl->flushes_pending.push_back(mask);
+ }
+ std::function<void()> func([this] { UnregisterPending(); });
+ impl->rasterizer.SyncOperation(std::move(func));
+ if (mask == 0) {
+ return;
+ }
+ impl->ForEachStreamerIn(mask,
+ [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); });
+}
+
+template <typename Traits>
+bool QueryCacheBase<Traits>::HasUncommittedFlushes() const {
+ bool result = false;
+ impl->ForEachStreamer([&result](StreamerInterface* streamer) {
+ result |= streamer->HasUnsyncedQueries();
+ return result;
+ });
+ return result;
+}
+
+template <typename Traits>
+bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() {
+ std::scoped_lock lk(impl->flush_guard);
+ return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL;
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::PopAsyncFlushes() {
+ u64 mask;
+ {
+ std::scoped_lock lk(impl->flush_guard);
+ mask = impl->flushes_pending.front();
+ impl->flushes_pending.pop_front();
+ }
+ if (mask == 0) {
+ return;
+ }
+ impl->ForEachStreamerIn(mask,
+ [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); });
+}
+
+// Invalidation
+
+template <typename Traits>
+void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) {
+ auto* query_base = impl->ObtainQuery(location);
+ if (!query_base) {
+ return;
+ }
+ query_base->flags |= QueryFlagBits::IsInvalidated;
+}
+
+template <typename Traits>
+bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) {
+ auto* query_base = impl->ObtainQuery(location);
+ if (!query_base) {
+ return false;
+ }
+ return True(query_base->flags & QueryFlagBits::IsHostManaged) &&
+ False(query_base->flags & QueryFlagBits::IsGuestSynced);
+}
+
+template <typename Traits>
+bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) {
+ auto* query_base = impl->ObtainQuery(location);
+ if (!query_base) {
+ return false;
+ }
+ if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
+ False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
+ auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address);
+ if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
+ std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
+ return false;
+ }
+ u32 value_l = static_cast<u32>(query_base->value);
+ std::memcpy(ptr, &value_l, sizeof(value_l));
+ return false;
+ }
+ return True(query_base->flags & QueryFlagBits::IsHostManaged) &&
+ False(query_base->flags & QueryFlagBits::IsGuestSynced);
+}
+
+template <typename Traits>
+void QueryCacheBase<Traits>::RequestGuestHostSync() {
+ impl->rasterizer.ReleaseFences();
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h
new file mode 100644
index 000000000..55f508dd1
--- /dev/null
+++ b/src/video_core/query_cache/query_cache_base.h
@@ -0,0 +1,181 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <functional>
+#include <mutex>
+#include <optional>
+#include <span>
+#include <unordered_map>
+#include <utility>
+
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "core/memory.h"
+#include "video_core/control/channel_state_cache.h"
+#include "video_core/query_cache/query_base.h"
+#include "video_core/query_cache/types.h"
+
+namespace Core::Memory {
+class Memory;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra {
+class GPU;
+}
+
+namespace VideoCommon {
+
+struct LookupData {
+ VAddr address;
+ QueryBase* found_query;
+};
+
+template <typename Traits>
+class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
+ using RuntimeType = typename Traits::RuntimeType;
+
+public:
+ union QueryLocation {
+ BitField<27, 5, u32> stream_id;
+ BitField<0, 27, u32> query_id;
+ u32 raw;
+
+ std::pair<size_t, size_t> unpack() {
+ return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
+ }
+ };
+
+ explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
+ Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_);
+
+ ~QueryCacheBase();
+
+ void InvalidateRegion(VAddr addr, std::size_t size) {
+ IterateCache<true>(addr, size,
+ [this](QueryLocation location) { InvalidateQuery(location); });
+ }
+
+ void FlushRegion(VAddr addr, std::size_t size) {
+ bool result = false;
+ IterateCache<false>(addr, size, [this, &result](QueryLocation location) {
+ result |= SemiFlushQueryDirty(location);
+ return result;
+ });
+ if (result) {
+ RequestGuestHostSync();
+ }
+ }
+
+ static u64 BuildMask(std::span<QueryType> types) {
+ u64 mask = 0;
+ for (auto query_type : types) {
+ mask |= 1ULL << (static_cast<u64>(query_type));
+ }
+ return mask;
+ }
+
+ /// Return true when a CPU region is modified from the GPU
+ [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) {
+ bool result = false;
+ IterateCache<false>(addr, size, [this, &result](QueryLocation location) {
+ result |= IsQueryDirty(location);
+ return result;
+ });
+ return result;
+ }
+
+ void CounterEnable(QueryType counter_type, bool is_enabled);
+
+ void CounterReset(QueryType counter_type);
+
+ void CounterClose(QueryType counter_type);
+
+ void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags,
+ u32 payload, u32 subreport);
+
+ void NotifyWFI();
+
+ bool AccelerateHostConditionalRendering();
+
+ // Async downloads
+ void CommitAsyncFlushes();
+
+ bool HasUncommittedFlushes() const;
+
+ bool ShouldWaitAsyncFlushes();
+
+ void PopAsyncFlushes();
+
+ void NotifySegment(bool resume);
+
+ void BindToChannel(s32 id) override;
+
+protected:
+ template <bool remove_from_cache, typename Func>
+ void IterateCache(VAddr addr, std::size_t size, Func&& func) {
+ static constexpr bool RETURNS_BOOL =
+ std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>;
+ const u64 addr_begin = addr;
+ const u64 addr_end = addr_begin + size;
+
+ const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS;
+ std::scoped_lock lock(cache_mutex);
+ for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) {
+ const u64 page_start = page << Core::Memory::YUZU_PAGEBITS;
+ const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) {
+ const u64 cache_begin = page_start + query_location;
+ const u64 cache_end = cache_begin + sizeof(u32);
+ return cache_begin < addr_end && addr_begin < cache_end;
+ };
+ const auto& it = cached_queries.find(page);
+ if (it == std::end(cached_queries)) {
+ continue;
+ }
+ auto& contents = it->second;
+ for (auto& query : contents) {
+ if (!in_range(query.first)) {
+ continue;
+ }
+ if constexpr (RETURNS_BOOL) {
+ if (func(query.second)) {
+ return;
+ }
+ } else {
+ func(query.second);
+ }
+ }
+ if constexpr (remove_from_cache) {
+ const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) {
+ return in_range(pair.first);
+ };
+ std::erase_if(contents, in_range2);
+ }
+ }
+ }
+
+ using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
+
+ void InvalidateQuery(QueryLocation location);
+ bool IsQueryDirty(QueryLocation location);
+ bool SemiFlushQueryDirty(QueryLocation location);
+ void RequestGuestHostSync();
+ void UnregisterPending();
+
+ std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries;
+ std::mutex cache_mutex;
+
+ struct QueryCacheBaseImpl;
+ friend struct QueryCacheBaseImpl;
+ friend RuntimeType;
+
+ std::unique_ptr<struct QueryCacheBaseImpl> impl;
+};
+
+} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h
new file mode 100644
index 000000000..dd5f95b3c
--- /dev/null
+++ b/src/video_core/query_cache/query_stream.h
@@ -0,0 +1,125 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <deque>
+#include <optional>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/query_cache/bank_base.h"
+#include "video_core/query_cache/query_base.h"
+
+namespace VideoCommon {
+
+class StreamerInterface {
+public:
+ StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {}
+ virtual ~StreamerInterface() = default;
+
+ virtual QueryBase* GetQuery(size_t id) = 0;
+
+ virtual void StartCounter() {
+ /* Do Nothing */
+ }
+
+ virtual void PauseCounter() {
+ /* Do Nothing */
+ }
+
+ virtual void ResetCounter() {
+ /* Do Nothing */
+ }
+
+ virtual void CloseCounter() {
+ /* Do Nothing */
+ }
+
+ virtual bool HasPendingSync() {
+ return false;
+ }
+
+ virtual void PresyncWrites() {
+ /* Do Nothing */
+ }
+
+ virtual void SyncWrites() {
+ /* Do Nothing */
+ }
+
+ virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
+ std::optional<u32> subreport = std::nullopt) = 0;
+
+ virtual bool HasUnsyncedQueries() {
+ return false;
+ }
+
+ virtual void PushUnsyncedQueries() {
+ /* Do Nothing */
+ }
+
+ virtual void PopUnsyncedQueries() {
+ /* Do Nothing */
+ }
+
+ virtual void Free(size_t query_id) = 0;
+
+ size_t GetId() const {
+ return id;
+ }
+
+protected:
+ const size_t id;
+ const u64 dependance_mask;
+};
+
+template <typename QueryType>
+class SimpleStreamer : public StreamerInterface {
+public:
+ SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
+ virtual ~SimpleStreamer() = default;
+
+protected:
+ virtual QueryType* GetQuery(size_t query_id) override {
+ if (query_id < slot_queries.size()) {
+ return &slot_queries[query_id];
+ }
+ return nullptr;
+ }
+
+ virtual void Free(size_t query_id) override {
+ std::scoped_lock lk(guard);
+ ReleaseQuery(query_id);
+ }
+
+ template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))>
+ size_t BuildQuery(Args&&... args) {
+ std::scoped_lock lk(guard);
+ if (!old_queries.empty()) {
+ size_t new_id = old_queries.front();
+ old_queries.pop_front();
+ new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...);
+ return new_id;
+ }
+ size_t new_id = slot_queries.size();
+ slot_queries.emplace_back(std::forward<Args>(args)...);
+ return new_id;
+ }
+
+ void ReleaseQuery(size_t query_id) {
+
+ if (query_id < slot_queries.size()) {
+ old_queries.push_back(query_id);
+ return;
+ }
+ UNREACHABLE();
+ }
+
+ std::mutex guard;
+ std::deque<QueryType> slot_queries;
+ std::deque<size_t> old_queries;
+};
+
+} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/types.h b/src/video_core/query_cache/types.h
new file mode 100644
index 000000000..e9226bbfc
--- /dev/null
+++ b/src/video_core/query_cache/types.h
@@ -0,0 +1,74 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace VideoCommon {
+
+enum class QueryPropertiesFlags : u32 {
+ HasTimeout = 1 << 0,
+ IsAFence = 1 << 1,
+};
+DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags)
+
+// This should always be equivalent to maxwell3d Report Semaphore Reports
+enum class QueryType : u32 {
+ Payload = 0, // "None" in docs, but confirmed via hardware to return the payload
+ VerticesGenerated = 1,
+ ZPassPixelCount = 2,
+ PrimitivesGenerated = 3,
+ AlphaBetaClocks = 4,
+ VertexShaderInvocations = 5,
+ StreamingPrimitivesNeededMinusSucceeded = 6,
+ GeometryShaderInvocations = 7,
+ GeometryShaderPrimitivesGenerated = 9,
+ ZCullStats0 = 10,
+ StreamingPrimitivesSucceeded = 11,
+ ZCullStats1 = 12,
+ StreamingPrimitivesNeeded = 13,
+ ZCullStats2 = 14,
+ ClipperInvocations = 15,
+ ZCullStats3 = 16,
+ ClipperPrimitivesGenerated = 17,
+ VtgPrimitivesOut = 18,
+ PixelShaderInvocations = 19,
+ ZPassPixelCount64 = 21,
+ IEEECleanColorTarget = 24,
+ IEEECleanZetaTarget = 25,
+ StreamingByteCount = 26,
+ TessellationInitInvocations = 27,
+ BoundingRectangle = 28,
+ TessellationShaderInvocations = 29,
+ TotalStreamingPrimitivesNeededMinusSucceeded = 30,
+ TessellationShaderPrimitivesGenerated = 31,
+ // max.
+ MaxQueryTypes,
+};
+
+// Comparison modes for Host Conditional Rendering
+enum class ComparisonMode : u32 {
+ False = 0,
+ True = 1,
+ Conditional = 2,
+ IfEqual = 3,
+ IfNotEqual = 4,
+ MaxComparisonMode,
+};
+
+// Reduction ops.
+enum class ReductionOp : u32 {
+ RedAdd = 0,
+ RedMin = 1,
+ RedMax = 2,
+ RedInc = 3,
+ RedDec = 4,
+ RedAnd = 5,
+ RedOr = 6,
+ RedXor = 7,
+ MaxReductionOp,
+};
+
+} // namespace VideoCommon \ No newline at end of file